library(car)
library(mosaic)
library(DT)
library(skimr)
library(tidyverse)

Read in the Data

Made by Jim and Brent. Let’s take a look.

files <- dir()
rdat <- read.csv(grep("RBdata.csv", files, value=TRUE), header=TRUE)
datatable(rdat)

Create first Pairs Plot

palette(gray(seq(0,.9,len = nrow(rdat))))
pairs(rdat, pch=16, cex=1.2, panel=panel.smooth, col.smooth="skyblue4", col=as.factor(rdat$Y))

Shrink Y

palette(gray(seq(0,.9,len = nrow(rdat))))
pairs(cbind(logY = log(rdat$Y), rdat), pch=16, cex=1.2, panel=panel.smooth, col.smooth="skyblue4", col=as.factor(rdat$Y))

Start with X1

lm1 <- lm(Y ~ X1, data=rdat)
summary(lm1)
## 
## Call:
## lm(formula = Y ~ X1, data = rdat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -125.5 -122.1  -78.0  -64.5 6110.2 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    78.75      45.44   1.733   0.0841 .
## X1             46.71      65.58   0.712   0.4769  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 567.5 on 298 degrees of freedom
## Multiple R-squared:  0.001699,   Adjusted R-squared:  -0.001651 
## F-statistic: 0.5073 on 1 and 298 DF,  p-value: 0.4769
boxCox(lm1, lambda = seq(-0.1, 0.05, 0.01))

lm1 <- lm(Y^-0.04 ~ X1, data=rdat)
summary(lm1)
## 
## Call:
## lm(formula = Y^-0.04 ~ X1, data = rdat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.253368 -0.060524  0.000226  0.061572  0.306126 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.939291   0.007284 128.948   <2e-16 ***
## X1          0.019103   0.010514   1.817   0.0702 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09098 on 298 degrees of freedom
## Multiple R-squared:  0.01096,    Adjusted R-squared:  0.007638 
## F-statistic: 3.301 on 1 and 298 DF,  p-value: 0.07023
rdat$Yt <- rdat$Y^-0.04
rdat <- rdat[,c(12,1:11)]
palette(c("skyblue","orange","green"))
pairs(cbind(R = lm1$res, fit = lm1$fit, rdat), pch=16, cex=1.2, panel=panel.smooth, col.smooth="skyblue4", col=as.factor(rdat$X3))

Hmm… Some x-variables seem to be transformed…

X3 should be logged…

skim(rdat)
## Skim summary statistics
##  n obs: 300 
##  n variables: 12 
## 
## ── Variable type:factor ──────────────────────────────────────────────
##  variable missing complete   n n_unique                     top_counts
##        X7       0      300 300      288 (-<: 2, Ali: 2, Ame: 2, Bob: 2
##  ordered
##    FALSE
## 
## ── Variable type:integer ─────────────────────────────────────────────
##  variable missing complete   n mean  sd p0 p25 p50 p75 p100     hist
##        X1       0      300 300 0.48 0.5  0   0   0   1    1 ▇▁▁▁▁▁▁▇
## 
## ── Variable type:numeric ─────────────────────────────────────────────
##  variable missing complete   n      mean        sd          p0     p25
##       X10       0      300 300   2.06        1.14      0.00081    1.07
##        X2       0      300 300  -3.02        2.1     -21.87      -3.98
##        X3     129      171 300   2.1e+26 2e+27     3e-05       2377.6 
##        X4       0      300 300  -2.39        1.26     -5.66      -3.42
##        X5       0      300 300   3.99        2.98     -2.26       1.4 
##        X6       0      300 300   2.11        1.15      0.011      1.2 
##        X8       0      300 300   4.24        3.96     -3.83       1   
##        X9       0      300 300  -8.32        6.48    -32.63     -12.18
##         Y       0      300 300 101.18      567.06      0.0028     0.7 
##        Yt       0      300 300   0.95        0.091     0.71       0.88
##       p50      p75       p100     hist
##   2.08     3.04       3.97    ▆▆▆▆▇▇▆▇
##  -3.09    -2.35      10.95    ▁▁▁▁▇▁▁▁
##   2.1e+10  3.7e+15    2.1e+28 ▇▁▁▁▁▁▁▁
##  -2.3     -1.4        1.48    ▁▅▆▇▇▆▁▁
##   4.04     6.46      10.84    ▂▇▇▇▇▇▆▁
##   2.27     3.04       3.99    ▆▆▆▆▇▇▇▇
##   1        9          9       ▁▁▁▇▁▁▁▆
##  -6.86    -3.64       8.81    ▁▁▂▃▆▇▃▁
##   3.7     22.35    6235.63    ▇▁▁▁▁▁▁▁
##   0.95     1.01       1.26    ▁▂▇▇▇▂▁▁

X7 seems especially odd… let’s split at the " - " maybe the numbers are useful.

table(rdat$X7)
## 
##   (-<U+203F><U+203F>-) - 124    (-<U+203F><U+203F>-) - 16 
##                            1                            1 
##   (-<U+203F><U+203F>-) - 174    (-<U+203F><U+203F>-) - 18 
##                            1                            1 
##     (-<U+203F><U+203F>-) - 2    (-<U+203F><U+203F>-) - 31 
##                            1                            2 
##    (-<U+203F><U+203F>-) - 32    (-<U+203F><U+203F>-) - 41 
##                            1                            1 
##    (-<U+203F><U+203F>-) - 42     (-<U+203F><U+203F>-) - 6 
##                            1                            1 
##    (-<U+203F><U+203F>-) - 62    (-<U+203F><U+203F>-) - 63 
##                            1                            1 
##     (-<U+203F><U+203F>-) - 7                    Alice - 0 
##                            1                            2 
##                    Alice - 1                   Alice - 10 
##                            1                            1 
##                  Alice - 112                  Alice - 118 
##                            1                            1 
##                  Alice - 136                  Alice - 189 
##                            1                            1 
##                  Alice - 199                    Alice - 3 
##                            1                            1 
##                  Alice - 455                   Alice - 79 
##                            1                            1 
##                 Amelia - 103                 Amelia - 134 
##                            1                            1 
##                 Amelia - 167                 Amelia - 192 
##                            1                            1 
##                   Amelia - 2                 Amelia - 273 
##                            1                            1 
##                  Amelia - 29                  Amelia - 33 
##                            1                            1 
##                  Amelia - 40                  Amelia - 43 
##                            1                            2 
##                  Amelia - 47                   Amelia - 5 
##                            1                            1 
##                  Amelia - 69                  Big Max - 0 
##                            1                            1 
##                  Big Max - 1                 Big Max - 10 
##                            1                            1 
##               Big Max - 1065                Big Max - 159 
##                            1                            1 
##                Big Max - 230                 Big Max - 26 
##                            1                            1 
##                 Big Max - 28                 Big Max - 30 
##                            1                            1 
##                Big Max - 313                Big Max - 371 
##                            1                            1 
##                Big Max - 434                 Big Max - 86 
##                            1                            1 
##                 Big Max - 88                     Bobo - 1 
##                            1                            1 
##                   Bobo - 132                   Bobo - 162 
##                            1                            1 
##                     Bobo - 2                   Bobo - 206 
##                            2                            1 
##                    Bobo - 28                    Bobo - 31 
##                            1                            1 
##                    Bobo - 45                    Bobo - 87 
##                            1                            1 
##                    Bobo - 92                     Cory - 0 
##                            1                            1 
##                     Cory - 1                   Cory - 150 
##                            2                            1 
##                     Cory - 2                   Cory - 314 
##                            1                            1 
##                    Cory - 38                   Cory - 488 
##                            1                            1 
##                     Cory - 5                    Cory - 68 
##                            1                            1 
##                    Cory - 71                    Cory - 74 
##                            1                            1 
##                     Cory - 9           Curious George - 0 
##                            1                            1 
##         Curious George - 106         Curious George - 137 
##                            1                            1 
##         Curious George - 167          Curious George - 20 
##                            1                            1 
##         Curious George - 252         Curious George - 288 
##                            1                            1 
##          Curious George - 30         Curious George - 304 
##                            1                            1 
##         Curious George - 389           Curious George - 5 
##                            1                            1 
##           Curious George - 6           Curious George - 7 
##                            1                            1 
##          Curious George - 85                    Emily - 0 
##                            1                            1 
##                  Emily - 133                  Emily - 147 
##                            1                            1 
##                  Emily - 189                  Emily - 256 
##                            1                            1 
##                   Emily - 27                  Emily - 274 
##                            1                            1 
##                  Emily - 318                  Emily - 384 
##                            1                            1 
##                   Emily - 43                   Emily - 46 
##                            1                            1 
##                    Emily - 8                   Ferris - 0 
##                            1                            1 
##                   Ferris - 1                  Ferris - 10 
##                            2                            1 
##                 Ferris - 125                   Ferris - 2 
##                            1                            1 
##                 Ferris - 281                  Ferris - 29 
##                            1                            1 
##                  Ferris - 35                  Ferris - 46 
##                            1                            1 
##                   Ferris - 5                   Ferris - 7 
##                            1                            1 
##                  Ferris - 73                  Ferris - 74 
##                            1                            1 
##                  Ferris - 80                  Ferris - 86 
##                            1                            1 
##                 Francine - 0               Francine - 144 
##                            1                            1 
##                Francine - 17               Francine - 176 
##                            1                            1 
##               Francine - 187                Francine - 25 
##                            1                            1 
##                 Francine - 3               Francine - 335 
##                            1                            1 
##                Francine - 36                 Francine - 5 
##                            1                            1 
##                Francine - 56                Francine - 60 
##                            1                            1 
##                 Francine - 9                   Henry - 14 
##                            1                            1 
##                   Henry - 16                   Henry - 22 
##                            1                            1 
##                   Henry - 27                   Henry - 34 
##                            1                            1 
##                   Henry - 36                    Henry - 4 
##                            1                            1 
##                  Henry - 469             Jacqueline - 154 
##                            1                            1 
##             Jacqueline - 203              Jacqueline - 21 
##                            1                            1 
##             Jacqueline - 216              Jacqueline - 30 
##                            1                            1 
##              Jacqueline - 47              Jacqueline - 66 
##                            1                            2 
##               Jacqueline - 7              Jacqueline - 86 
##                            1                            1 
##              Jacqueline - 90                 Jessie - 137 
##                            1                            1 
##                  Jessie - 16                 Jessie - 219 
##                            1                            1 
##                  Jessie - 22                 Jessie - 256 
##                            1                            1 
##                 Jessie - 315                  Jessie - 32 
##                            1                            1 
##                  Jessie - 70                  Jessie - 79 
##                            1                            1 
##                    John - 10                    John - 17 
##                            1                            1 
##                   John - 208                   John - 216 
##                            1                            1 
##                    John - 22                   John - 275 
##                            1                            1 
##                    John - 30                   John - 323 
##                            1                            1 
##                   John - 368                   John - 385 
##                            1                            1 
##                    John - 46                    John - 66 
##                            1                            1 
##                    John - 85                     John - 9 
##                            1                            1 
##   Johnny James Yogurt Jr - 1 Johnny James Yogurt Jr - 140 
##                            1                            1 
## Johnny James Yogurt Jr - 157   Johnny James Yogurt Jr - 2 
##                            1                            1 
##  Johnny James Yogurt Jr - 29 Johnny James Yogurt Jr - 345 
##                            1                            1 
## Johnny James Yogurt Jr - 594  Johnny James Yogurt Jr - 77 
##                            1                            1 
##  Johnny James Yogurt Jr - 78   Johnny James Yogurt Jr - 8 
##                            1                            1 
##  Johnny James Yogurt Jr - 90                Kystystal - 0 
##                            1                            1 
##              Kystystal - 105               Kystystal - 11 
##                            1                            1 
##              Kystystal - 135              Kystystal - 197 
##                            1                            1 
##                Kystystal - 2              Kystystal - 227 
##                            1                            1 
##               Kystystal - 27               Kystystal - 40 
##                            1                            1 
##               Kystystal - 54               Kystystal - 55 
##                            1                            1 
##               Kystystal - 57              Kystystal - 625 
##                            1                            1 
##               Kystystal - 74                Kystystal - 9 
##                            1                            1 
##         Marcel the Shell - 0         Marcel the Shell - 1 
##                            2                            1 
##       Marcel the Shell - 135       Marcel the Shell - 147 
##                            1                            1 
##         Marcel the Shell - 2       Marcel the Shell - 211 
##                            1                            1 
##        Marcel the Shell - 27        Marcel the Shell - 29 
##                            1                            2 
##         Marcel the Shell - 3        Marcel the Shell - 33 
##                            1                            1 
##       Marcel the Shell - 449        Marcel the Shell - 48 
##                            1                            1 
##         Marcel the Shell - 5        Marcel the Shell - 51 
##                            1                            1 
##        Marcel the Shell - 69                    Moose - 0 
##                            1                            1 
##                    Moose - 1                   Moose - 16 
##                            1                            1 
##                  Moose - 187                  Moose - 289 
##                            1                            1 
##                  Moose - 299                  Moose - 313 
##                            1                            1 
##                  Moose - 397                  Moose - 581 
##                            1                            1 
##                   Moose - 86                 Olivia - 105 
##                            1                            1 
##                 Olivia - 139                  Olivia - 16 
##                            1                            1 
##                 Olivia - 168                 Olivia - 210 
##                            1                            1 
##                  Olivia - 29                  Olivia - 36 
##                            1                            1 
##                 Olivia - 388                  Olivia - 44 
##                            1                            1 
##                  Olivia - 52                 Olivia - 662 
##                            1                            1 
##                  Olivia - 68                  Olivia - 71 
##                            1                            1 
##                 Olivia - 870                    Orion - 0 
##                            1                            1 
##                    Orion - 1                  Orion - 115 
##                            1                            1 
##                  Orion - 117                  Orion - 145 
##                            1                            1 
##                  Orion - 148                   Orion - 16 
##                            1                            1 
##                    Orion - 2                  Orion - 250 
##                            1                            1 
##                   Orion - 28                   Orion - 32 
##                            1                            1 
##                  Orion - 328                    Orion - 5 
##                            1                            1 
##                   Orion - 94            S C Kennedy - 135 
##                            1                            1 
##            S C Kennedy - 152              S C Kennedy - 2 
##                            1                            1 
##             S C Kennedy - 35             S C Kennedy - 36 
##                            1                            1 
##             S C Kennedy - 40              S C Kennedy - 5 
##                            1                            1 
##             S C Kennedy - 72             S C Kennedy - 78 
##                            1                            1 
##                  Sarah - 102                   Sarah - 15 
##                            1                            1 
##                   Sarah - 18                  Sarah - 186 
##                            1                            1 
##                   Sarah - 30                  Sarah - 300 
##                            1                            1 
##                   Sarah - 32                  Sarah - 377 
##                            1                            1 
##                  Sarah - 459                   Sarah - 47 
##                            1                            1 
##                  Sarah - 660                     Thor - 0 
##                            1                            1 
##                   Thor - 144                   Thor - 189 
##                            1                            1 
##                    Thor - 26                     Thor - 3 
##                            1                            2 
##                    Thor - 36                     Thor - 5 
##                            1                            1 
##                    Thor - 56                     Thor - 8 
##                            1                            1 
##                     Thor - 9                    Thor - 92 
##                            1                            1 
##                    Zach - 10                   Zach - 135 
##                            1                            1 
##                   Zach - 151                    Zach - 19 
##                            1                            2 
##                    Zach - 32                    Zach - 39 
##                            1                            1 
##                     Zach - 4                   Zach - 485 
##                            2                            1 
##                     Zach - 6                    Zach - 71 
##                            1                            1
rdat <- separate(rdat, col=X7, into=c("X7.1","X7.2"), sep = " - ") 

rdat <- as.data.frame(rdat)

rdat$X2.1 <- rdat$X2^(1/3)
rdat$X3.1 <- log(rdat$X3+1)
rdat$X7.1 <- as.factor(rdat$X7.1)
rdat$X7.2 <- as.numeric(rdat$X7.2)
glimpse(rdat)
## Observations: 300
## Variables: 15
## $ Yt   <dbl> 0.8758811, 0.8814728, 0.9824715, 1.1595656, 1.0238355, 0....
## $ Y    <dbl> 27.47078738, 23.43028592, 1.55596955, 0.02469544, 0.55493...
## $ X1   <int> 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, ...
## $ X2   <dbl> -3.0668674, -4.2638940, -2.4616331, -2.8691551, -2.199544...
## $ X3   <dbl> NA, 1.209923e+08, 9.137040e+07, 3.648866e+03, NA, 1.04338...
## $ X4   <dbl> -1.0158286, -1.2215244, -2.7545422, -4.3892816, -2.143609...
## $ X5   <dbl> 5.8467221, 3.4308835, 2.8617592, 1.4579335, 1.9219055, 1....
## $ X6   <dbl> 3.62008230, 3.62740321, 0.76221983, 0.16364930, 2.3162938...
## $ X7.1 <fct> Olivia, Cory, S C Kennedy, Big Max, Sarah, Alice, Marcel ...
## $ X7.2 <dbl> 36, 71, 2, 10, 377, 199, 51, 44, 10, 47, 90, 36, 31, 10, ...
## $ X8   <dbl> 1, 1, 9, 9, 9, 9, 1, 1, 9, 1, 1, 9, 9, 1, 1, 1, 1, 1, 1, ...
## $ X9   <dbl> -6.010369, -8.412774, 1.574063, -3.233196, -19.415281, -1...
## $ X10  <dbl> 1.71989128, 2.91232062, 2.79434218, 1.21254593, 3.3632002...
## $ X2.1 <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, Na...
## $ X3.1 <dbl> NA, 18.61123770, 18.33043215, 8.20244570, NA, 9.25290273,...
palette(gray(seq(0,.9,len = nrow(rdat))))
pairs(cbind(R = lm1$res, fit = lm1$fit, rdat), pch=16, cex=1.2, panel=panel.smooth, col.smooth="skyblue4", col=as.factor(rdat$Yt))

Try log(X3)

lm2 <- lm(Yt ~ log(X3), data=rdat)
summary(lm2)
## 
## Call:
## lm(formula = Yt ~ log(X3), data = rdat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24416 -0.06446  0.00145  0.06415  0.30774 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.9340958  0.0119621  78.088   <2e-16 ***
## log(X3)     0.0005542  0.0004038   1.373    0.172    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09513 on 169 degrees of freedom
##   (129 observations deleted due to missingness)
## Multiple R-squared:  0.01103,    Adjusted R-squared:  0.005176 
## F-statistic: 1.884 on 1 and 169 DF,  p-value: 0.1717
lm3 <- lm(Yt ~ X7.2*X9, data=rdat)
summary(lm3)
## 
## Call:
## lm(formula = Yt ~ X7.2 * X9, data = rdat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.243503 -0.063263  0.000302  0.065959  0.310846 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.387e-01  1.066e-02  88.071   <2e-16 ***
## X7.2        -1.031e-04  2.922e-04  -0.353    0.724    
## X9          -2.309e-03  3.064e-03  -0.753    0.452    
## X7.2:X9     -1.129e-06  7.847e-06  -0.144    0.886    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09162 on 296 degrees of freedom
## Multiple R-squared:  0.003803,   Adjusted R-squared:  -0.006294 
## F-statistic: 0.3766 on 3 and 296 DF,  p-value: 0.7699
palette("default")
pairs(cbind(R = lm2$res, fit = lm2$fit, rdat[!is.na(rdat$X3),]), pch=16, cex=1, panel=panel.smooth, col.smooth="skyblue4", col=as.factor(rdat$X7.1))

Try some interactions

lm4 <- lm(Yt ~ I(X10^2)*I(X6^2), data=rdat)
summary(lm4)
## 
## Call:
## lm(formula = Yt ~ I(X10^2) * I(X6^2), data = rdat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.247008 -0.059866 -0.000932  0.064496  0.314375 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.9550130  0.0127488  74.910   <2e-16 ***
## I(X10^2)         -0.0001212  0.0017931  -0.068    0.946    
## I(X6^2)          -0.0020872  0.0016942  -1.232    0.219    
## I(X10^2):I(X6^2)  0.0001859  0.0002265   0.821    0.412    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09143 on 296 degrees of freedom
## Multiple R-squared:  0.007887,   Adjusted R-squared:  -0.002168 
## F-statistic: 0.7844 on 3 and 296 DF,  p-value: 0.5034

Give up.

I look forward to seeing how you built your data. You’ve got me stumped. Here is my final model, \(Y_i' = \beta_0 + \epsilon_i\) where \(Y' = log(Y)\).

lm5 <- lm(Yt ~ 1, data=rdat)
summary(lm5)
## 
## Call:
## lm(formula = Yt ~ 1, data = rdat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.243435 -0.065319  0.000573  0.065887  0.316059 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.948460   0.005273   179.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09133 on 299 degrees of freedom